#,message=FALSE, warning=FALSE
library(dplyr)
library(ggpubr)
library(ggplot2)
library(lmtest)
library(lubridate)
library(limma)
library(tidyr)
library(plotly)
library(gridExtra)
library(data.table)
library(formattable)
library(readxl)
library(data.table)
#####################
## Import data
#####################
BaseDir <- params$BaseDir#get the root of the directory where the data is stored
#All the Madison data is contained in these two files#MMSD_Cases_processed
path <- paste0(BaseDir, "COVID-19_WastewaterAnalysis/data/processed/WATERMICRO_WW_COVID-2022-01-07_F_v1.1.xlsx")
## Import file
data<-read_excel(path)
#levels(as.factor(data$wwtp_name))
#Ignore 527918001 sample (Madison-P2-Central)
data<-data[which(!(data$sample_id %in% "527918001")), ]
# Replace all NA by real NA
#data <- data %>% mutate_all(na_if,"")
#####################
## Preparation
#####################
## Select only the Madison samples
data.MAD<-data[which(data$wwtp_name %like% "Madison*"), ]
## Interceptor correctly names?
#levels(as.factor(data.MAD$wwtp_name))
#No! Madison-P7-SE and P18-NE (04/15/2021)
# Rename WWTP
data.MAD$wwtp_name <- plyr::revalue(data.MAD$wwtp_name, c("Madison Metro" = "MC",
"Madison-P11-SW" = "P11",
"Madison-P18-NE" = "P18",
"Madison-P2-Central" = "P2",
"Madison-P7-SE" = "P7",
"Madison-P8-West" = "P8"))
## Reformat variables
data.MAD<-data.MAD[, c("wwtp_name", "average_flow_rate", "avg_sars_cov2_conc", "pmmov_conc", "bcov_rec_rate", "sample_collect_date")]
names(data.MAD)<-c("wwtp", "Flow", "SC2", "PMMoV", "BCoV", "date")
data.MAD$Flow<-as.numeric(as.character(data.MAD$Flow))
data.MAD$SC2<-as.numeric(as.character(data.MAD$SC2))
data.MAD$PMMoV<-as.numeric(as.character(data.MAD$PMMoV))
data.MAD$BCoV<-as.numeric(as.character(data.MAD$BCoV))
data.MAD$date<-format(as.Date((as.character(data.MAD$date)), origin = "1899-12-30"), "%m/%d/%Y")
#format(as.Date(as.numeric(as.character(data.MAD$date)), origin = "1899-12-30"), "%m/%d/%Y")
#as.Date(((data.MAD$date)), origin = "1899-12-30")
# Extract the sample and duplicate it to get one line for P7 and one line for P18
sample.row<-which(data.MAD$wwtp == "Madison-P7-SE and P18-NE")
sample.info<-data.MAD[sample.row, ]
sample.info<-rbind(sample.info, sample.info)
sample.info[, c("wwtp")]<-c("P7", "P18")
sample.info$Flow<-sample.info$Flow/2
sample.info$SC2<-sample.info$SC2/2
sample.info$PMMoV<-sample.info$PMMoV/2
sample.info$BCoV<-sample.info$BCoV/2
data.MAD<-rbind(data.MAD[-sample.row, ], sample.info)
# Create Pivot Table (PV) with selected data
data.MAD.melt<-reshape2::melt(data.MAD, id.var=c("wwtp", "date"))
data.MAD.melt$wwtp.var<-paste0(data.MAD.melt$variable, "_", data.MAD.melt$wwtp)
data.MAD.melt<-data.MAD.melt[, c("date", "wwtp.var", "value")]
#data.MAD.melt$wwtp.var <- sub('.', '', data.MAD.melt$wwtp.var)
data.MAD.cast<-reshape2::dcast(data.MAD.melt, date~wwtp.var)
data.MAD.PV<-data.MAD.cast[complete.cases(data.MAD.cast), ]
## Create the weekday of the sample collection
data.MAD.PV$day<-format(as.Date(data.MAD.PV$date, format="%m/%d/%Y"), "%A")
head(data.MAD.PV)
## date BCoV_MC BCoV_P11 BCoV_P18 BCoV_P2 BCoV_P7 BCoV_P8 Flow_MC
## 3 01/03/2022 6.321000 7.868000 6.13700 5.905000 7.05100 2.84300 32.88
## 10 01/11/2021 1.560000 0.760000 1.74000 1.790000 1.21000 1.92000 36.31
## 13 01/14/2021 6.280000 3.390000 5.07000 1.550000 4.45000 2.17000 36.30
## 15 01/18/2021 7.530346 3.984846 5.42708 9.709973 16.66528 10.58819 36.66
## 18 01/21/2021 27.310000 7.390000 10.89000 7.080000 4.08000 10.40000 37.39
## 20 01/25/2021 11.440000 8.990000 10.37000 11.380000 10.95000 11.63000 37.28
## Flow_P11 Flow_P18 Flow_P2 Flow_P7 Flow_P8 PMMoV_MC PMMoV_P11 PMMoV_P18
## 3 7.78 10.44 4.81 4.22 5.63 27321489 58326490 22346320
## 10 8.98 12.00 5.22 4.07 6.04 40077911 50800078 15894033
## 13 8.85 12.51 5.37 3.29 6.28 54139514 187180326 24929363
## 15 8.96 12.20 5.22 4.24 6.04 24412773 143617722 14666440
## 18 9.00 12.39 5.64 4.18 6.18 25553068 16804016 10786946
## 20 9.31 11.82 5.74 4.35 6.06 31732106 31434596 29445680
## PMMoV_P2 PMMoV_P7 PMMoV_P8 SC2_MC SC2_P11 SC2_P18 SC2_P2 SC2_P7 SC2_P8
## 3 28470941 24739939 20096307 2537828 3906323 3655762 2544628 2308027 1386309
## 10 69705020 39362943 28159113 228599 126852 326181 232896 159366 173468
## 13 42575828 28497639 37767522 183445 153016 425849 86007 78426 70093
## 15 40915958 31353171 18468196 197520 77624 261939 723151 164651 80951
## 18 22400070 11624454 19264438 201624 97251 400642 108210 77519 105763
## 20 35099462 22900169 22716404 184084 317237 378716 78421 85735 88608
## day
## 3 Monday
## 10 Monday
## 13 Thursday
## 15 Monday
## 18 Thursday
## 20 Monday
#format(as.Date((as.character(data.MAD$date)), origin = "1899-12-30"), "%m/%d/%Y")
#Comments On 4/15/21 P7 and P8 were pooled together before being processed. The flow were averaged between these two interceptors.
#####################
## Compute new variables
#####################
data.MAD.PV1 <-data.MAD.PV %>%
group_by(date, day) %>%
summarize(`Difference_Flow_MC:P` = round(Flow_P11+Flow_P18+Flow_P2+Flow_P7+Flow_P8 - Flow_MC, digits=4),
# Compute SARS-CoV-2 Mass Loading for MC and interceptors
MC_SC2_Mass = (3.785*1000000)*Flow_MC*SC2_MC,
P2_SC2_Mass = (3.785*1000000)*Flow_P2*SC2_P2,
P7_SC2_Mass = (3.785*1000000)*Flow_P7*SC2_P7,
P8_SC2_Mass = (3.785*1000000)*Flow_P8*SC2_P8,
P11_SC2_Mass = (3.785*1000000)*Flow_P11*SC2_P11,
P18_SC2_Mass = (3.785*1000000)*Flow_P18*SC2_P18,
# Compute PMMoV Mass Loading for MC and interceptors
MC_PMMoV_Mass = (3.785*1000000)*Flow_MC*PMMoV_MC,
P2_PMMoV_Mass = (3.785*1000000)*Flow_P2*PMMoV_P2,
P7_PMMoV_Mass = (3.785*1000000)*Flow_P7*PMMoV_P7,
P8_PMMoV_Mass = (3.785*1000000)*Flow_P8*PMMoV_P8,
P11_PMMoV_Mass = (3.785*1000000)*Flow_P11*PMMoV_P11,
P18_PMMoV_Mass = (3.785*1000000)*Flow_P18*PMMoV_P18,
.groups = 'drop')
data.MAD.PV2<-cbind(data.MAD.PV, data.MAD.PV1[, -c(1:2)])
# Compute the Ratio: SARS-CoV-2 Mass-MC/sum(Mass-interceptors)
data.MAD.PV2$`SC2_Mass_Ratio_MC:P`<-data.MAD.PV2$MC_SC2_Mass/(data.MAD.PV2$P2_SC2_Mass+data.MAD.PV2$P7_SC2_Mass+data.MAD.PV2$P8_SC2_Mass+data.MAD.PV2$P11_SC2_Mass+data.MAD.PV2$P18_SC2_Mass)
# Compute the Ratio: PMMoV Mass-MC/sum(Mass-interceptors)
data.MAD.PV2$`PMMoV_Mass_Ratio_MC:P`<-data.MAD.PV2$MC_PMMoV_Mass/(data.MAD.PV2$P2_PMMoV_Mass+data.MAD.PV2$P7_PMMoV_Mass+data.MAD.PV2$P8_PMMoV_Mass+data.MAD.PV2$P11_PMMoV_Mass+data.MAD.PV2$P18_PMMoV_Mass)
#####################
## Export
#####################
data.MAD.final<-data.MAD.PV2[, c("date", "day",
"Flow_MC", "Flow_P2", "Flow_P7", "Flow_P8", "Flow_P11", "Flow_P18", "Difference_Flow_MC:P",
"SC2_MC", "SC2_P2", "SC2_P7", "SC2_P8", "SC2_P11", "SC2_P18", "SC2_Mass_Ratio_MC:P",
"PMMoV_MC", "PMMoV_P2", "PMMoV_P7", "PMMoV_P8", "PMMoV_P11", "PMMoV_P18", "PMMoV_Mass_Ratio_MC:P",
"BCoV_MC", "BCoV_P2", "BCoV_P7", "BCoV_P8", "BCoV_P11", "BCoV_P18")]
#write.xlsx(data.MAD.final, paste0("K:\\WS\\MassBalance\\MassBalance_", #format(Sys.time(), '%Y%m%d'), ".xlsx"), overwrite = TRUE)
#Time series analysis
##Average Daily Flow No difference was found between the average daily flow of the Main Composite (MC) and the sum of the interceptors (Figure 1).
#####################
## Visualization
#####################
### Preparation
data.MAD.plot<-reshape2::melt( data.MAD.PV2,id.vars=c("date"), by=c("wwtp", "date"))
data.MAD.plot$sewershed<-ifelse(data.MAD.plot$variable %like% "MC", "MC", "P")
data.MAD.plot$interceptor<-ifelse(data.MAD.plot$variable %like% "P2", "P2",
ifelse(data.MAD.plot$variable %like% "P7", "P7",
ifelse(data.MAD.plot$variable %like% "P8", "P8",
ifelse(data.MAD.plot$variable %like% "P11", "P11",
ifelse(data.MAD.plot$variable %like% "P18", "P18", "MC")))))
data.MAD.plot$ratio<-ifelse(data.MAD.plot$variable %like% "Ratio", "Ratio", "NotRatio")
data.MAD.plot$variable <- plyr::revalue(data.MAD.plot$variable, c("Difference_Flow_MC:P" = "FlowDifference_MC:P",
"SC2_Mass_Ratio_MC:P" = "SC2Mass_Ratio_MC:P",
"PMMoV_Mass_Ratio_MC:P" = "PMMoVMass_Ratio_MC:P"))
data.MAD.plot$Ratio<-ifelse(data.MAD.plot$value < 0, "<0.0",
ifelse(data.MAD.plot$value < 0.5, "<0.5",
ifelse(data.MAD.plot$value < 1, "<1.0",
ifelse(data.MAD.plot$value < 1.5, "<1.5",
ifelse(data.MAD.plot$value < 2, "<2.0", ">=2.0")))))
data.MAD.plot$value <- as.numeric(data.MAD.plot$value)
## Warning: NAs introduced by coercion
data.MAD.plot$ratio.label<-ifelse(data.MAD.plot$value < 0 | as.numeric(data.MAD.plot$value) >= 2, toString(round(data.MAD.plot$value, digits=1)), "")
data.MAD.plot$Ratio <- ordered(data.MAD.plot$Ratio, levels = c(">=2.0", "<2.0", "<1.5", "<1.0", "<0.5", "<0.0"))
data.MAD.plot$Ratio.simp<-ifelse(data.MAD.plot$value < 0, "+/- >1",
ifelse(data.MAD.plot$value < 0.5, "+/- <1",
ifelse(data.MAD.plot$value < 1, "+/- =<0.5",
ifelse(data.MAD.plot$value <= 1.5, "+/- =<0.5",
ifelse(data.MAD.plot$value < 2, "+/- <1", "+/- >1")))))
data.MAD.plot$Ratio.simp <- ordered(data.MAD.plot$Ratio.simp, levels = c("+/- >1", "+/- <1", "+/- =<0.5"))
mc.flow<- data.MAD.plot %>% filter(sewershed=="MC" & variable %like% "Flow_")
p.flow<-data.MAD.plot %>% filter(sewershed=="P" & variable %like% "Flow_")
data.MAD.plot$Differences<-ifelse(data.MAD.plot$value != 0, round(data.MAD.plot$value, digits=1), "No difference")
Differences<-subset(data.MAD.plot, variable=="FlowDifference_MC:P")
Differences<-Differences[, c("date", "Differences")]
mc.flow<-as.data.frame(setDT(mc.flow)[Differences, on="date"])
mc.flow$Difference<-ifelse(mc.flow$Differences == "No difference", "No difference", "Difference")
mc.flow$diff.label<-ifelse(mc.flow$Differences == "No difference", "", mc.flow$Differences)
flow<-ggplot(data.MAD.plot, aes(x = date)) +
geom_col(aes(fill = interceptor, y=value), data = p.flow, width = 0.8) +
scale_fill_brewer(palette="Spectral") +
#scale_fill_manual(values=c("#00798c", "#d1495b", "#edae49", "#66a182", "#2e4057")) +
geom_point(aes(group = interceptor, y=value, color=Difference), data = mc.flow, size=3, show.legend = FALSE) +
scale_color_manual(values = c("black", "green")) +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
ylab("Average daily flow (MGD)") +
geom_hline(yintercept=median(mc.flow$value), linetype = "dashed", color="black") +
ggtitle("Average daily flow (MMSD)") +
geom_text(aes(label=diff.label, y=value), data = mc.flow, hjust=0, vjust=0.5, angle = 90, color="black", size = 3)
ggplotly(flow)
#SARS-CoV-2 Daily Mass Loading Overall, SARS-CoV-2 time series in Daily Mass Loading ratio between MC and the interceptors (Figure 2) did not reveal any clear temporal trend. I will have to double check the 1st date, as the SARS-CoV-2 seems abnormally high.
# SARS-CoV-2 (SC2)
mc.SC2<- data.MAD.plot %>% filter(sewershed=="MC" & variable %like% "_SC2_Mass")
p.SC2<-data.MAD.plot %>% filter(sewershed=="P" & variable %like% "_SC2_Mass")
r.SC2<-data.MAD.plot %>% filter(variable=="SC2Mass_Ratio_MC:P")
SC2.mass<-ggplot(data.MAD.plot, aes(x = date)) +
geom_col(aes(fill = interceptor, y=value), data = p.SC2, width = 0.8) +
scale_fill_brewer(palette="Spectral") +
geom_point(aes(group = interceptor, y=value), data = mc.SC2) +
theme_light() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank()) +
ylab("N1/N2 gene copies per day") +
geom_hline(yintercept=median(mc.SC2$value), color="red") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
SC2.ratio.1<-ggplot(data.MAD.plot %>% filter(variable=="SC2Mass_Ratio_MC:P"), aes(x = date, y=value, group=1)) +
geom_line() +
geom_point(aes(color = Ratio.simp), size = 1) +
theme_light() +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
#ylab("Ratio of MMSD composite to SUM of interceptors") +
geom_hline(yintercept=1, color="red") +
#ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")+
ylim(-1,3) +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
geom_text(aes(label=ratio.label, y=1), data = r.SC2, position = position_dodge(0.9), angle=90, hjust=-0.5, vjust=.5, color="black", size = 3) +
ylab("ratio")
gg0<-ggarrange(SC2.mass, SC2.ratio.1, heights = c(2, 0.8),
ncol = 1, nrow = 2, align = "v")
## Warning: Removed 1 rows containing missing values (geom_point).
gg0
#PMMoV Daily Mass Loading As for SARS-CoV-2, no clear temporal trend was observed in the PMMoV time series in Daily Mass Loading ratio between MC and the interceptors (Figure 3). However, larger number of date for which ratios are 1 +/- 0.5 (Green dots in Figure 3 bottom panel).3
# PMMoV
mc.PMMoV<- data.MAD.plot %>% filter(sewershed=="MC" & variable %like% "_PMMoV_Mass")
p.PMMoV<-data.MAD.plot %>% filter(sewershed=="P" & variable %like% "_PMMoV_Mass")
r.PMMoV<-data.MAD.plot %>% filter(variable=="PMMoVMass_Ratio_MC:P")
PMMoV.mass<-ggplot(data.MAD.plot, aes(x = date)) +
geom_col(aes(fill = interceptor, y=value), data = p.PMMoV, width = 0.8) +
scale_fill_brewer(palette="Spectral") +
geom_point(aes(group = interceptor, y=value), data = mc.PMMoV) +
theme_light() +
ylim(0,1.5e+16) +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank()) +
ylab("PMMoV gene copies per day") +
geom_hline(yintercept=median(mc.PMMoV$value), color="red") +
ggtitle("PMMoV, 24h Mass Loading (Madison)")
PMMoV.ratio.1<-ggplot(data.MAD.plot %>% filter(variable=="PMMoVMass_Ratio_MC:P"), aes(x = date, y=value, group=1)) +
geom_line() +
geom_point(aes(color = Ratio.simp), size = 1) +
theme_light() +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
#ylab("Ratio of MMSD composite to SUM of interceptors") +
geom_hline(yintercept=1, color="red") +
#ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")+
ylim(-1,3) +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
geom_text(aes(label=ratio.label, y=1), data = r.PMMoV, position = position_dodge(0.9), angle=90, hjust=-0.5, vjust=.5, color="black", size = 3) +
ylab("ratio")
gg1<-ggarrange(PMMoV.mass, PMMoV.ratio.1, heights = c(2, 0.8),
ncol = 1, nrow = 2, align = "v")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
gg1
#Daily Mass Loading: SARS-CoV-2 vs PMMoV The difference of ratio MC:P for SARS-CoV-2 does not correlate with the difference of ratio MC:P for PMMoV (Figure 4).
# Add Flow difference to data.MAD.PV2
data.MAD.plot2<-setDT(data.MAD.PV2[, c("date", "Difference_Flow_MC:P", "BCoV_MC", "Flow_MC", "BCoV_P2", "BCoV_P7", "BCoV_P8", "BCoV_P11", "BCoV_P18")])[data.MAD.plot, on="date"]
data.MAD.plot2$`Ratio BCoV`<-data.MAD.plot2$BCoV_MC/((data.MAD.plot2$BCoV_P2+data.MAD.plot2$BCoV_P7+data.MAD.plot2$BCoV_P8+data.MAD.plot2$BCoV_P11+data.MAD.plot2$BCoV_P18)/5)
data.MAD.plot2$variable <- plyr::revalue(data.MAD.plot2$variable, c("SC2Mass_Ratio_MC:P" = "SARS-CoV-2",
"PMMoVMass_Ratio_MC:P" = "PMMoV"))
# Preparation
data.lm<-data.MAD.PV2
fit <- lm(`SC2_Mass_Ratio_MC:P` ~ `PMMoV_Mass_Ratio_MC:P`, data =data.lm)
yrng <- range(data.lm$`SC2_Mass_Ratio_MC:P`)
xrng <- range(data.lm$`PMMoV_Mass_Ratio_MC:P`)
#Plot
f4<-ggplot(data.MAD.PV2, aes(x = `PMMoV_Mass_Ratio_MC:P`, y=`SC2_Mass_Ratio_MC:P`)) +
geom_point(stat = "identity") +
theme_light() +
xlim(0,20) +
ylim(0,20) +
labs(subtitle = paste0("Linear model: Adj R2 = ", round(signif(summary(fit)$adj.r.squared, 5), digits=4),
"; P = ", round(signif(summary(fit)$coef[2,4], 5), digits=3))) +
ylab("SARS-CoV-2 Mass Ratio MC:P") +
xlab("PMMoV Mass Ratio MC:P")
ggplotly(f4)
f5<-ggplot(data.MAD.plot2 %>% filter(variable %in% c("SARS-CoV-2", "PMMoV")), aes(x=variable, y=value)) +
geom_boxplot(width=0.5, outlier.alpha = 0) +
geom_jitter(aes(color=Flow_MC), width=0.2) +
theme_light() +
geom_hline(yintercept=1, color="red") +
ylab("Ratio MC:P") +
theme(axis.title.x = element_blank())
ggplotly(f5)
f6<-ggplot(data.MAD.plot2 %>% filter(variable %in% c("SARS-CoV-2", "PMMoV")), aes(x=variable, y=value)) +
geom_boxplot(width=0.5, outlier.alpha = 0) +
geom_jitter(aes(color=`BCoV_MC`), width=0.2) +
theme_light() +
geom_hline(yintercept=1, color="red") +
ylab("Ratio MC:P") +
theme(axis.title.x = element_blank())
ggplotly(f6)
f7<-ggplot(data.MAD.plot2 %>% filter(variable %in% c("SARS-CoV-2", "PMMoV")), aes(x=variable, y=value)) +
geom_boxplot(width=0.5, outlier.alpha = 0) +
geom_jitter(aes(color=`Ratio BCoV`), width=0.2) +
theme_light() +
geom_hline(yintercept=1, color="red") +
ylab("Ratio MC:P") +
theme(axis.title.x = element_blank())
ggplotly(f7)
#Normalization
##Average daily flow The flow per interceptor was normalized to obtain a sum of flows of 100%. Based on Figure 8, it is unclear that high mass load-ratios between MC and the interceptors are associated with difference of flow pattern. One exception could be for SARS-CoV-2 on 4-15-21.
# Preparation
flow.100<-as.data.frame(cbind((data.MAD.PV2$Flow_P2*100)/data.MAD.PV2$Flow_MC,
(data.MAD.PV2$Flow_P7*100)/data.MAD.PV2$Flow_MC,
(data.MAD.PV2$Flow_P8*100)/data.MAD.PV2$Flow_MC,
(data.MAD.PV2$Flow_P11*100)/data.MAD.PV2$Flow_MC,
(data.MAD.PV2$Flow_P18*100)/data.MAD.PV2$Flow_MC,
data.MAD.PV2$date))
names(flow.100)<-c("P2", "P7", "P8", "P11", "P18", "date")
flow.100$P2<-as.numeric(as.character(flow.100$P2))
flow.100$P7<-as.numeric(as.character(flow.100$P7))
flow.100$P8<-as.numeric(as.character(flow.100$P8))
flow.100$P11<-as.numeric(as.character(flow.100$P11))
flow.100$P18<-as.numeric(as.character(flow.100$P18))
flow.100.melt<-reshape2::melt(flow.100, id.var=c("date"))
# Plot
flow.100.plot<-ggplot(flow.100.melt, aes(x = date, y=value)) +
geom_col(aes(fill = variable), width = 0.8) +
scale_fill_brewer(palette="Spectral") +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
ylab("Average daily flow (%)") +
ggtitle("Average daily flow distribution between interceptors") +
geom_point(aes(color = Ratio.simp, y=-5, x=date), data = data.MAD.plot %>% filter(variable=="SC2Mass_Ratio_MC:P"), show.legend = FALSE) +
geom_point(aes(color = Ratio.simp, y=-10), data = data.MAD.plot %>% filter(variable=="PMMoVMass_Ratio_MC:P"), show.legend = FALSE) +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c"))
ggplotly(flow.100.plot)
#SARS-CoV-2 There is a clear distinction in the distribution of the SARS-CoV-2 signal between the interceptors before and after the end of July 2021, matching with the spike of Delta in the population? (Figure 9 and 10). Overall, there is no clear visual cue showing that the over-representation of an interceptor is “systematically” associated with the high MC:P ratios. Looking at the values, and not the graph could maybe be better to extract trends… Figure 10 (which must be redundant with Figure 2), is the same than Figure 9, but the mass load interceptors were normalized to the MC mass load (not the sum of the interceptors). In theory, the sum of the stacked bar should be closed to 100%. As for Figure 9, there is no clear visual cue showing that the over-representation of an interceptor is associated with high MC:P ratios.
# Preparation
data.MAD.PV2$SC2_Ptot_Mass<-data.MAD.PV2$P2_SC2_Mass+data.MAD.PV2$P7_SC2_Mass+data.MAD.PV2$P8_SC2_Mass+data.MAD.PV2$P11_SC2_Mass+data.MAD.PV2$P18_SC2_Mass
SC2.100<-as.data.frame(cbind((data.MAD.PV2$P2_SC2_Mass)*100/data.MAD.PV2$SC2_Ptot_Mass,
(data.MAD.PV2$P7_SC2_Mass)*100/data.MAD.PV2$SC2_Ptot_Mass,
(data.MAD.PV2$P8_SC2_Mass)*100/data.MAD.PV2$SC2_Ptot_Mass,
(data.MAD.PV2$P11_SC2_Mass)*100/data.MAD.PV2$SC2_Ptot_Mass,
(data.MAD.PV2$P18_SC2_Mass)*100/data.MAD.PV2$SC2_Ptot_Mass,
data.MAD.PV2$date))
names(SC2.100)<-c("P2", "P7", "P8", "P11", "P18", "date")
SC2.100$P2<-as.numeric(as.character(SC2.100$P2))
SC2.100$P7<-as.numeric(as.character(SC2.100$P7))
SC2.100$P8<-as.numeric(as.character(SC2.100$P8))
SC2.100$P11<-as.numeric(as.character(SC2.100$P11))
SC2.100$P18<-as.numeric(as.character(SC2.100$P18))
SC2.100.melt<-reshape2::melt(SC2.100, id.var=c("date"))
# Plot
SC2.100.plot<-ggplot(SC2.100.melt, aes(x = date, y=value)) +
geom_col(aes(fill = variable), width = 0.8) +
scale_fill_brewer(palette="Spectral") +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
ylab("SARS-CoV-2 signal (%)") +
ggtitle("SARS-CoV-2 mass load distribution between interceptors")+
geom_point(aes(color = Ratio.simp, y=-5), data = data.MAD.plot %>% filter(variable=="SC2Mass_Ratio_MC:P"), show.legend = FALSE) +
geom_point(aes(color = Ratio.simp, y=-10), data = data.MAD.plot %>% filter(variable=="PMMoVMass_Ratio_MC:P"), show.legend = FALSE) +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c"))
ggplotly(SC2.100.plot)
# Preparation
SC2.100.MC<-as.data.frame(cbind((data.MAD.PV2$P2_SC2_Mass)*100/data.MAD.PV2$MC_SC2_Mass,
(data.MAD.PV2$P7_SC2_Mass)*100/data.MAD.PV2$MC_SC2_Mass,
(data.MAD.PV2$P8_SC2_Mass)*100/data.MAD.PV2$MC_SC2_Mass,
(data.MAD.PV2$P11_SC2_Mass)*100/data.MAD.PV2$MC_SC2_Mass,
(data.MAD.PV2$P18_SC2_Mass)*100/data.MAD.PV2$MC_SC2_Mass,
data.MAD.PV2$date))
names(SC2.100.MC)<-c("P2", "P7", "P8", "P11", "P18", "date")
SC2.100.MC$P2<-as.numeric(as.character(SC2.100.MC$P2))
SC2.100.MC$P7<-as.numeric(as.character(SC2.100.MC$P7))
SC2.100.MC$P8<-as.numeric(as.character(SC2.100.MC$P8))
SC2.100.MC$P11<-as.numeric(as.character(SC2.100.MC$P11))
SC2.100.MC$P18<-as.numeric(as.character(SC2.100.MC$P18))
SC2.100.MC.melt<-reshape2::melt(SC2.100.MC, id.var=c("date"))
# Plot
SC2.100.MC.plot<-ggplot(SC2.100.MC.melt, aes(x = date, y=value)) +
geom_col(aes(fill = variable), width = 0.8) +
scale_fill_brewer(palette="Spectral") +
theme(axis.text.x = element_text(angle = 90, size = 4, hjust=1),
axis.title.x = element_blank()) +
ylab("SARS-CoV-2 signal (%)") +
ggtitle("Distribution of MC-SARS-CoV-2 mass load signal across interceptors")+
geom_point(aes(color = Ratio.simp, y=-50), data = data.MAD.plot %>% filter(variable=="SC2Mass_Ratio_MC:P"), show.legend = FALSE) +
geom_point(aes(color = Ratio.simp, y=-100), data = data.MAD.plot %>% filter(variable=="PMMoVMass_Ratio_MC:P"), show.legend = FALSE) +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c"))
ggplotly(SC2.100.MC.plot)